{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "df953e1a-b48a-4eda-b099-c89c9449fdad",
   "metadata": {},
   "source": [
    "## Gravitino access control"
   ]
  },
  {
   "cell_type": "markdown",
   "source": [
    "This demo shows that authorizing the Hive Catalog through Gravitino and then using Spark to query this hive datasource authenticates the user's operation, allowing or denying the user's operation. \n",
    " You can log in to the Apache Ranger admin service to see the permissions.\n",
    "\n",
    "+ Apache Ranger admin service: http://localhost:6080/, the login user name is `admin` and the password is `rangerR0cks!`.\n",
    "+ Apache Gravitino access control document: https://gravitino.apache.org/docs/latest/security/access-control"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "d75740e99c5ed90e"
  },
  {
   "cell_type": "markdown",
   "source": [
    "### Add the manager to the metalake"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "42a9fa39f5d9ef03"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import requests\n",
    "import json\n",
    "\n",
    "headers = {\n",
    "    'Accept': 'application/vnd.gravitino.v1+json',\n",
    "    'Content-Type': 'application/json',\n",
    "}\n",
    "\n",
    "data = {\n",
    "    \"name\": \"manager\"\n",
    "}\n",
    "\n",
    "response = requests.post('http://gravitino:8090/api/metalakes/metalake_demo/users', headers=headers, data=json.dumps(data))\n",
    "\n",
    "# print the response text (the content of the requested file):\n",
    "print(response.text)"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "b09c15849e20a095"
  },
  {
   "cell_type": "markdown",
   "source": [
    "\n",
    "\n",
    "### Create a Hive catalog with Ranger authorization"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "cd2dc99e370d1c71"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import requests\n",
    "import json\n",
    "url = \"http://gravitino:8090/api/metalakes/metalake_demo/catalogs\"\n",
    "headers = {\n",
    "    \"Accept\": \"application/vnd.gravitino.v1+json\",\n",
    "    \"Content-Type\": \"application/json\",\n",
    "    \"Authorization\": \"Basic bWFuYWdlcjoxMjM=\",\n",
    "}\n",
    "data = {\n",
    "    \"name\": \"catalog_hive_ranger\",\n",
    "    \"type\": \"RELATIONAL\",\n",
    "    \"provider\": \"hive\",\n",
    "    \"comment\": \"comment\",\n",
    "    \"properties\": {\n",
    "        \"metastore.uris\": \"thrift://hive:9083\",\n",
    "        \"authorization-provider\": \"ranger\",\n",
    "        \"authorization.ranger.admin.url\": \"http://ranger:6080\",\n",
    "        \"authorization.ranger.auth.type\": \"simple\",\n",
    "        \"authorization.ranger.username\": \"admin\",\n",
    "        \"authorization.ranger.password\": \"rangerR0cks!\",\n",
    "        \"authorization.ranger.service.type\": \"HadoopSQL\",\n",
    "        \"authorization.ranger.service.name\": \"hiveDev\"\n",
    "    }\n",
    "}\n",
    "\n",
    "response = requests.post(url, headers=headers, data=json.dumps(data))\n",
    "\n",
    "print(response.text)\n"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "c39ff0b34e25c2e4"
  },
  {
   "cell_type": "markdown",
   "id": "6cdaf5b0-18e1-45df-8e8f-13376e41c421",
   "metadata": {},
   "source": [
    "### Install PySpark"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "380aa111-a955-4a48-835f-8a5402bf1a4a",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pyspark\n",
    "import os\n",
    "from pyspark.sql import SparkSession\n",
    "os.environ['HADOOP_USER_NAME']=\"manager\"\n",
    "gravitino_connector_jar = os.getenv('SPARK_CONNECTOR_JAR')\n",
    "\n",
    "spark = SparkSession.builder \\\n",
    "    .appName(\"PySpark SQL Example\") \\\n",
    "    .config(\"spark.plugins\", \"org.apache.gravitino.spark.connector.plugin.GravitinoSparkPlugin\") \\\n",
    "    .config(\"spark.jars\", f\"/tmp/gravitino/packages/iceberg-spark-runtime-3.4_2.12-1.5.2.jar,\\\n",
    "                           /tmp/gravitino/packages/{gravitino_connector_jar},\\\n",
    "                           /tmp/gravitino/packages/kyuubi-spark-authz-shaded_2.12-1.9.2.jar\") \\\n",
    "    .config(\"spark.sql.gravitino.uri\", \"http://gravitino:8090\") \\\n",
    "    .config(\"spark.sql.gravitino.metalake\", \"metalake_demo\") \\\n",
    "    .config(\"spark.sql.gravitino.enableIcebergSupport\", \"true\") \\\n",
    "    .config(\"spark.sql.catalog.catalog_rest\", \"org.apache.iceberg.spark.SparkCatalog\") \\\n",
    "    .config(\"spark.sql.catalog.catalog_rest.type\", \"rest\") \\\n",
    "    .config(\"spark.sql.catalog.catalog_rest.uri\", \"http://gravitino:9001/iceberg/\") \\\n",
    "    .config(\"spark.locality.wait.node\", \"0\") \\\n",
    "    .config(\"spark.driver.extraClassPath\", \"/tmp/gravitino\") \\\n",
    "    .config(\"spark.sql.extensions\", \"org.apache.kyuubi.plugin.spark.authz.ranger.RangerSparkExtension\") \\\n",
    "    .config(\"spark.sql.warehouse.dir\", \"hdfs://hive:9000/user/hive/warehouse\") \\\n",
    "    .enableHiveSupport() \\\n",
    "    .getOrCreate()"
   ]
  },
  {
   "cell_type": "markdown",
   "source": [
    "### Show databases list under the catalog_hive"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "499295c00807fb0d"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d70e6c04-bb61-4b5b-8525-41a4a5a34b54",
   "metadata": {},
   "outputs": [],
   "source": [
    "spark.sql(\"USE catalog_hive_ranger\")\n",
    "spark.sql(\"SHOW DATABASES\").show()"
   ]
  },
  {
   "cell_type": "markdown",
   "source": [
    "### Create database access control"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "e33c1f5bdee0b3b1"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d2fae330-e932-4482-9622-bcc454844ff8",
   "metadata": {},
   "outputs": [],
   "source": [
    "spark.sql(\"CREATE DATABASE IF NOT EXISTS access_control;\")\n",
    "spark.sql(\"USE catalog_hive_ranger\")\n",
    "spark.sql(\"SHOW DATABASES\").show()"
   ]
  },
  {
   "cell_type": "markdown",
   "source": [
    "### Create table customers"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "f416641cedd311ca"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c34d2bfc-f13a-4dcc-ae40-f2a2874bfffa",
   "metadata": {},
   "outputs": [],
   "source": [
    "spark.sql(\"USE access_control;\")\n",
    "spark.sql(\"CREATE TABLE customers (customer_id int, customer_name string, customer_email string);\")\n",
    "spark.sql(\"SHOW TABLES\").show()"
   ]
  },
  {
   "cell_type": "markdown",
   "source": [
    "### Select and insert data for the table"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "68adabbd976e106b"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "spark.sql(\"INSERT INTO customers (customer_id, customer_name, customer_email) VALUES (11,'Rory Brown','rory@123.com');\")\n",
    "spark.sql(\"INSERT INTO customers (customer_id, customer_name, customer_email) VALUES (12,'Jerry Washington','jerry@dt.com');\")\n",
    "spark.sql(\"SELECT * FROM customers\").show()"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "7113e44ad213ff45"
  },
  {
   "cell_type": "markdown",
   "source": [
    "### You should click the jupyter button to restart the notebook, we will start a new spark context with user lisa"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "ee84f44711c7a939"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import pyspark\n",
    "import os\n",
    "from pyspark.sql import SparkSession\n",
    "os.environ['HADOOP_USER_NAME']=\"lisa\"\n",
    "gravitino_connector_jar = os.getenv('SPARK_CONNECTOR_JAR')\n",
    "\n",
    "spark = SparkSession.builder \\\n",
    "    .appName(\"PySpark SQL Example\") \\\n",
    "    .config(\"spark.plugins\", \"org.apache.gravitino.spark.connector.plugin.GravitinoSparkPlugin\") \\\n",
    "    .config(\"spark.jars\", f\"/tmp/gravitino/packages/iceberg-spark-runtime-3.4_2.12-1.5.2.jar,\\\n",
    "                           /tmp/gravitino/packages/{gravitino_connector_jar},\\\n",
    "                           /tmp/gravitino/packages/kyuubi-spark-authz-shaded_2.12-1.9.2.jar\") \\\n",
    "    .config(\"spark.sql.gravitino.uri\", \"http://gravitino:8090\") \\\n",
    "    .config(\"spark.sql.gravitino.metalake\", \"metalake_demo\") \\\n",
    "    .config(\"spark.sql.gravitino.enableIcebergSupport\", \"true\") \\\n",
    "    .config(\"spark.sql.catalog.catalog_rest\", \"org.apache.iceberg.spark.SparkCatalog\") \\\n",
    "    .config(\"spark.sql.catalog.catalog_rest.type\", \"rest\") \\\n",
    "    .config(\"spark.sql.catalog.catalog_rest.uri\", \"http://gravitino:9001/iceberg/\") \\\n",
    "    .config(\"spark.locality.wait.node\", \"0\") \\\n",
    "    .config(\"spark.driver.extraClassPath\", \"/tmp/gravitino\") \\\n",
    "    .config(\"spark.sql.extensions\", \"org.apache.kyuubi.plugin.spark.authz.ranger.RangerSparkExtension\") \\\n",
    "    .config(\"spark.sql.warehouse.dir\", \"hdfs://hive:9000/user/hive/warehouse\") \\\n",
    "    .enableHiveSupport() \\\n",
    "    .getOrCreate()"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "81f1b47f026aa59d"
  },
  {
   "cell_type": "markdown",
   "source": [
    "#### Add Spark execute user `lisa` into Gravitino\n",
    "+ https://gravitino.apache.org/docs/0.6.0-incubating/security/access-control#add-a-user"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "8530bac14f93ef26"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import requests\n",
    "import json\n",
    "\n",
    "headers = {\n",
    "    'Accept': 'application/vnd.gravitino.v1+json',\n",
    "    'Content-Type': 'application/json',\n",
    "}\n",
    "\n",
    "data = {\n",
    "    \"name\": \"lisa\"\n",
    "}\n",
    "\n",
    "response = requests.post('http://gravitino:8090/api/metalakes/metalake_demo/users', headers=headers, data=json.dumps(data))\n",
    "\n",
    "# print the response text (the content of the requested file):\n",
    "print(response.text)"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "25ca43caa7aa5a30"
  },
  {
   "cell_type": "markdown",
   "source": [
    "### Create a developer role"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "23b6521d01362ba2"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import requests\n",
    "import json\n",
    "\n",
    "url = \"http://gravitino:8090/api/metalakes/metalake_demo/roles\"\n",
    "headers = {\n",
    "    \"Accept\": \"application/vnd.gravitino.v1+json\",\n",
    "    \"Content-Type\": \"application/json\",\n",
    "}\n",
    "data = {\n",
    "    \"name\": \"developer\",\n",
    "    \"properties\": {\"k1\": \"v1\"},\n",
    "    \"securableObjects\": [\n",
    "        {\n",
    "            \"fullName\": \"catalog_hive_ranger\",\n",
    "            \"type\": \"CATALOG\",\n",
    "            \"privileges\": [\n",
    "                {\n",
    "                    \"name\": \"USE_CATALOG\",\n",
    "                    \"condition\": \"ALLOW\"\n",
    "                }\n",
    "            ]\n",
    "        },\n",
    "        {\n",
    "            \"fullName\": \"catalog_hive_ranger.access_control\",\n",
    "            \"type\": \"SCHEMA\",\n",
    "            \"privileges\": [\n",
    "                {\n",
    "                    \"name\": \"USE_SCHEMA\",\n",
    "                    \"condition\": \"ALLOW\"\n",
    "                },\n",
    "                {\n",
    "                    \"name\": \"CREATE_TABLE\",\n",
    "                    \"condition\": \"ALLOW\"\n",
    "                },\n",
    "                {\n",
    "                    \"name\": \"MODIFY_TABLE\",\n",
    "                    \"condition\": \"ALLOW\"\n",
    "                },\n",
    "                {\n",
    "                    \"name\": \"SELECT_TABLE\",\n",
    "                    \"condition\": \"ALLOW\"\n",
    "                }\n",
    "            ]\n",
    "        }\n",
    "    ]\n",
    "}\n",
    "\n",
    "response = requests.post(url, headers=headers, data=json.dumps(data))\n",
    "\n",
    "print(response.text)"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "60527c15252f9bc8"
  },
  {
   "cell_type": "markdown",
   "source": [
    "### Grant role to Spark execute user lisa\n",
    "+ https://gravitino.apache.org/docs/0.6.0-incubating/security/access-control#grant-roles-to-a-user"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "ad03479cbb062c80"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import requests\n",
    "import json\n",
    "\n",
    "url = \"http://gravitino:8090/api/metalakes/metalake_demo/permissions/users/lisa/grant\"\n",
    "headers = {\n",
    "    \"Accept\": \"application/vnd.gravitino.v1+json\",\n",
    "    \"Content-Type\": \"application/json\",\n",
    "}\n",
    "data = {\n",
    "    \"roleNames\": [\"developer\"]\n",
    "}\n",
    "\n",
    "response = requests.put(url, headers=headers, data=json.dumps(data))\n",
    "\n",
    "# print status code and response text\n",
    "print(response.status_code)\n",
    "print(response.text)"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "822c265e4981cf8d"
  },
  {
   "cell_type": "markdown",
   "source": [
    "### Select and insert data for the table"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "342b2b8b96235f06"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "spark.sql(\"USE catalog_hive_ranger;\")\n",
    "spark.sql(\"USE access_control;\")\n",
    "spark.sql(\"INSERT INTO customers (customer_id, customer_name, customer_email) VALUES (11,'Rory Brown','rory@123.com');\")\n",
    "spark.sql(\"INSERT INTO customers (customer_id, customer_name, customer_email) VALUES (12,'Jerry Washington','jerry@dt.com');\")\n",
    "spark.sql(\"SELECT * FROM customers\").show()"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "968b1f56d7cf3167"
  },
  {
   "cell_type": "markdown",
   "source": [
    "### Create another table"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "e633d159e006740d"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "spark.sql(\"CREATE TABLE another_customers (customer_id int, customer_name string, customer_email string);\")\n",
    "spark.sql(\"SHOW TABLES;\").show()"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "e2b59346fbf058ce"
  },
  {
   "cell_type": "markdown",
   "source": [
    "### Succeed to drop his table"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "1e5624cfef2048fd"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "spark.sql(\"DROP TABLE another_customers;\")\n",
    "spark.sql(\"SHOW TABLES;\").show()"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "b62051095545975"
  },
  {
   "cell_type": "markdown",
   "source": [
    "### Fail to drop others' table"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "161c5822bf5b3f67"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "from py4j.protocol import Py4JJavaError\n",
    "\n",
    "try:\n",
    "    spark.sql(\"DROP TABLE customers;\")\n",
    "except Py4JJavaError as e:\n",
    "    print(\"An error occurred: \", e.java_exception)"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "3d8c1a7563009cdc"
  },
  {
   "cell_type": "markdown",
   "source": [
    "## Change another role for the user"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "1a51eaa11e56d2f7"
  },
  {
   "cell_type": "markdown",
   "source": [
    "### Revoke role from Spark execute user lisa\n",
    "+ https://gravitino.apache.org/docs/0.6.0-incubating/security/access-control#revoke-roles-from-a-user"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "5a6bd716b808b53d"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import requests\n",
    "import json\n",
    "\n",
    "url = \"http://gravitino:8090/api/metalakes/metalake_demo/permissions/users/lisa/revoke\"\n",
    "headers = {\n",
    "    \"Accept\": \"application/vnd.gravitino.v1+json\",\n",
    "    \"Content-Type\": \"application/json\",\n",
    "}\n",
    "data = {\n",
    "    \"roleNames\": [\"developer\"]\n",
    "}\n",
    "\n",
    "response = requests.put(url, headers=headers, data=json.dumps(data))\n",
    "\n",
    "# print status code and response text\n",
    "print(response.status_code)\n",
    "print(response.text)"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "21229241aa84650a"
  },
  {
   "cell_type": "markdown",
   "source": [
    "### Create a analyst role"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "947303c40f7e8835"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import requests\n",
    "import json\n",
    "\n",
    "url = \"http://gravitino:8090/api/metalakes/metalake_demo/roles\"\n",
    "headers = {\n",
    "    \"Accept\": \"application/vnd.gravitino.v1+json\",\n",
    "    \"Content-Type\": \"application/json\",\n",
    "}\n",
    "data = {\n",
    "    \"name\": \"analyst\",\n",
    "    \"properties\": {\"k1\": \"v1\"},\n",
    "    \"securableObjects\": [\n",
    "        {\n",
    "            \"fullName\": \"catalog_hive_ranger\",\n",
    "            \"type\": \"CATALOG\",\n",
    "            \"privileges\": [\n",
    "                {\n",
    "                    \"name\": \"USE_CATALOG\",\n",
    "                    \"condition\": \"ALLOW\"\n",
    "                }\n",
    "            ]\n",
    "        },\n",
    "        {\n",
    "            \"fullName\": \"catalog_hive_ranger.access_control\",\n",
    "            \"type\": \"SCHEMA\",\n",
    "            \"privileges\": [\n",
    "                {\n",
    "                    \"name\": \"USE_SCHEMA\",\n",
    "                    \"condition\": \"ALLOW\"\n",
    "                },\n",
    "                {\n",
    "                    \"name\": \"SELECT_TABLE\",\n",
    "                    \"condition\": \"ALLOW\"\n",
    "                }\n",
    "            ]\n",
    "        }\n",
    "    ]\n",
    "}\n",
    "\n",
    "response = requests.post(url, headers=headers, data=json.dumps(data))\n",
    "\n",
    "print(response.text)"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "7117a353c5843a1e"
  },
  {
   "cell_type": "markdown",
   "source": [
    "###  Grant a analyst to the user"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "78a722b6539a73ba"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import requests\n",
    "import json\n",
    "\n",
    "url = \"http://gravitino:8090/api/metalakes/metalake_demo/permissions/users/lisa/grant\"\n",
    "headers = {\n",
    "    \"Accept\": \"application/vnd.gravitino.v1+json\",\n",
    "    \"Content-Type\": \"application/json\",\n",
    "}\n",
    "data = {\n",
    "    \"roleNames\": [\"analyst\"]\n",
    "}\n",
    "\n",
    "response = requests.put(url, headers=headers, data=json.dumps(data))\n",
    "\n",
    "# print status code and response text\n",
    "print(response.status_code)\n",
    "print(response.text)"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "eab62ec65b36dbc6"
  },
  {
   "cell_type": "markdown",
   "source": [
    "### Succeed to select data from the table"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "251a5c4001b7ecb3"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "spark.sql(\"SELECT * FROM customers\").show()"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "9cfb4a73cd36e529"
  },
  {
   "cell_type": "markdown",
   "source": [
    "### Fail to insert the data to the table"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "450de05e4252f16"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "from py4j.protocol import Py4JJavaError\n",
    "\n",
    "try:\n",
    "    spark.sql(\"INSERT INTO customers (customer_id, customer_name, customer_email) VALUES (11,'Rory Brown','rory@123.com');\")\n",
    "    spark.sql(\"INSERT INTO customers (customer_id, customer_name, customer_email) VALUES (12,'Jerry Washington','jerry@dt.com');\")\n",
    "except Py4JJavaError as e:\n",
    "    print(\"An error occurred: \", e.java_exception)"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "de7d35aa98cb3001"
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
